{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据流例子"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install faker\n",
    "import sys\n",
    "sys.path.append(\"../\")\n",
    "\n",
    "from fppy.lazy_list import LazyList\n",
    "from faker import Faker\n",
    "from IPython.display import display\n",
    "from datetime import datetime as dt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 演示如何使用`Faker`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Mozilla/5.0 (iPad; CPU iPad OS 9_3_6 like Mac OS X) AppleWebKit/533.1 (KHTML, like Gecko) CriOS/24.0.867.0 Mobile/77X748 Safari/533.1'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'2021-11-09 16:40:44'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "44"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'Joshua Nielsen'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fk = Faker()\n",
    "display(fk.chrome())\n",
    "display(fk.date_time_between_dates().strftime(\"%Y-%m-%d %H:%M:%S\"))\n",
    "display(fk.random_int(14, 50))\n",
    "display(fk.name())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 制作一个生成假数据的生成器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "def data_stream_gen():\n",
    "    \"\"\"数据流生成器\n",
    "    \"\"\"\n",
    "    fk = Faker()\n",
    "    Faker.seed(0) # 为了让下面的显示结果一致\n",
    "    while True:\n",
    "        yield {\n",
    "            \"create_time\": (\n",
    "                fk\n",
    "                .date_time_between_dates()\n",
    "                .strftime(\"%Y-%m-%d %H:%M:%S\")),\n",
    "            \"name\": fk.name(),\n",
    "            \"email\": fk.email(),\n",
    "            \"age\": fk.random_int(14, 50),\n",
    "            \"address\": fk.address(),\n",
    "            \"device\": {\n",
    "                \"browser\": fk.chrome()\n",
    "            }\n",
    "        }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看是不是可用："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'create_time': '2021-11-09 16:47:38',\n",
       "  'name': 'Jennifer Green',\n",
       "  'email': 'ysullivan@example.com',\n",
       "  'age': 44,\n",
       "  'address': '242 Christine Glen\\nWest Corey, TX 43780',\n",
       "  'device': {'browser': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/46.0.833.0 Safari/534.2'}},\n",
       " {'create_time': '2021-11-09 16:47:38',\n",
       "  'name': 'Amy Stark',\n",
       "  'email': 'johnponce@example.net',\n",
       "  'age': 42,\n",
       "  'address': '714 Mann Plaza Suite 839\\nSeanfurt, OK 32234',\n",
       "  'device': {'browser': 'Mozilla/5.0 (iPad; CPU iPad OS 10_3_4 like Mac OS X) AppleWebKit/533.0 (KHTML, like Gecko) CriOS/28.0.827.0 Mobile/32P094 Safari/533.0'}},\n",
       " {'create_time': '2021-11-09 16:47:38',\n",
       "  'name': 'Joanne Keller',\n",
       "  'email': 'christopher91@example.com',\n",
       "  'age': 35,\n",
       "  'address': '41352 Simmons Circle\\nPort Dustinbury, OK 83627',\n",
       "  'device': {'browser': 'Mozilla/5.0 (Linux; Android 4.0.2) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/26.0.893.0 Safari/535.2'}}]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "LazyList(data_stream_gen()).take(3).collect()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 制作一些处理流数据的函数\n",
    "\n",
    "\n",
    "1. `drop_device`：删除设备信息\n",
    "2. `user_info`：将所有用户信息归并在一起\n",
    "3. `change_create_time`：修改时间格式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def drop_device(x):\n",
    "    \"\"\"删除device信息\n",
    "    \"\"\"\n",
    "    return { k:v for k, v in x.items() if k != 'device'}\n",
    "\n",
    "def user_info(x):\n",
    "    \"\"\"将用户信息包在一起\n",
    "    \"\"\"\n",
    "    user_info_name = {\"name\", \"email\", \"age\", \"adress\"}\n",
    "    return {**{\n",
    "        'user_info': {\n",
    "            k:v\n",
    "            for k, v in x.items() if k in user_info_name\n",
    "        },\n",
    "    }, **{k: v for k, v in x.items() if k not in user_info_name}}\n",
    "\n",
    "def change_create_time(x):\n",
    "    \"\"\"修改日期格式\n",
    "    \"\"\"\n",
    "    res = x.copy()\n",
    "    res['create_time'] = dt.strptime(\n",
    "        res['create_time'],\n",
    "        \"%Y-%m-%d %H:%M:%S\"\n",
    "    ).timestamp()\n",
    "    return res"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 测试处理结果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'user_info': {'name': 'Jennifer Green',\n",
       "   'email': 'ysullivan@example.com',\n",
       "   'age': 44},\n",
       "  'create_time': 1636447661.0,\n",
       "  'address': '242 Christine Glen\\nWest Corey, TX 43780'},\n",
       " {'user_info': {'name': 'Amy Stark',\n",
       "   'email': 'johnponce@example.net',\n",
       "   'age': 42},\n",
       "  'create_time': 1636447661.0,\n",
       "  'address': '714 Mann Plaza Suite 839\\nSeanfurt, OK 32234'},\n",
       " {'user_info': {'name': 'Joanne Keller',\n",
       "   'email': 'christopher91@example.com',\n",
       "   'age': 35},\n",
       "  'create_time': 1636447661.0,\n",
       "  'address': '41352 Simmons Circle\\nPort Dustinbury, OK 83627'}]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "LazyList(data_stream_gen())\\\n",
    "    .map(drop_device)\\\n",
    "    .map(user_info)\\\n",
    "    .map(change_create_time)\\\n",
    "    .take(3)\\\n",
    "    .collect()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "b00d0533784e0bd7ec39dc1f12727fcf9021e9a985583666278f4fb55e7dfcf4"
  },
  "kernelspec": {
   "display_name": "Python 3.10.0 64-bit ('fppy2': conda)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
